adding a basic RSS agent

Andrew Cantino 9 anos atrás
pai
commit
f4df522f2f

+ 1 - 0
Gemfile

@@ -72,6 +72,7 @@ gem 'em-http-request', '~> 1.1.2'
72 72
 gem 'weibo_2', '~> 0.1.4'
73 73
 gem 'hipchat', '~> 1.2.0'
74 74
 gem 'xmpp4r',  '~> 0.5.6'
75
+gem 'feed-normalizer'
75 76
 gem 'slack-notifier', '~> 0.5.0'
76 77
 
77 78
 gem 'therubyracer', '~> 0.12.1'

+ 6 - 0
Gemfile.lock

@@ -116,6 +116,9 @@ GEM
116 116
       multipart-post (>= 1.2, < 3)
117 117
     faraday_middleware (0.9.1)
118 118
       faraday (>= 0.7.4, < 0.10)
119
+    feed-normalizer (1.5.2)
120
+      hpricot (>= 0.6)
121
+      simple-rss (>= 1.1)
119 122
     ffi (1.9.3)
120 123
     forecast_io (2.0.0)
121 124
       faraday
@@ -144,6 +147,7 @@ GEM
144 147
     hike (1.2.3)
145 148
     hipchat (1.2.0)
146 149
       httparty
150
+    hpricot (0.8.6)
147 151
     http (0.5.1)
148 152
       http_parser.rb
149 153
     http_parser.rb (0.6.0)
@@ -265,6 +269,7 @@ GEM
265 269
       faraday (>= 0.9.0.rc5)
266 270
       jwt (>= 0.1.5)
267 271
       multi_json (>= 1.0.0)
272
+    simple-rss (1.3.1)
268 273
     simple_oauth (0.2.0)
269 274
     simplecov (0.8.2)
270 275
       docile (~> 1.1.0)
@@ -359,6 +364,7 @@ DEPENDENCIES
359 364
   em-http-request (~> 1.1.2)
360 365
   faraday (~> 0.9.0)
361 366
   faraday_middleware
367
+  feed-normalizer
362 368
   forecast_io (~> 2.0.0)
363 369
   foreman (~> 0.63.0)
364 370
   geokit (~> 1.8.4)

+ 61 - 0
app/concerns/web_request_concern.rb

@@ -0,0 +1,61 @@
1
+module WebRequestConcern
2
+  extend ActiveSupport::Concern
3
+
4
+  def validate_web_request_options!
5
+    if options['user_agent'].present?
6
+      errors.add(:base, "user_agent must be a string") unless options['user_agent'].is_a?(String)
7
+    end
8
+
9
+    unless headers(options['headers']).is_a?(Hash)
10
+      errors.add(:base, "if provided, headers must be a hash")
11
+    end
12
+
13
+    begin
14
+      basic_auth_credentials(options['basic_auth'])
15
+    rescue ArgumentError => e
16
+      errors.add(:base, e.message)
17
+    end
18
+  end
19
+
20
+  def faraday
21
+    @faraday ||= Faraday.new { |builder|
22
+      builder.headers = headers if headers.length > 0
23
+
24
+      if (user_agent = interpolated['user_agent']).present?
25
+        builder.headers[:user_agent] = user_agent
26
+      end
27
+
28
+      builder.use FaradayMiddleware::FollowRedirects
29
+      builder.request :url_encoded
30
+      if userinfo = basic_auth_credentials
31
+        builder.request :basic_auth, *userinfo
32
+      end
33
+
34
+      case backend = faraday_backend
35
+        when :typhoeus
36
+          require 'typhoeus/adapters/faraday'
37
+      end
38
+      builder.adapter backend
39
+    }
40
+  end
41
+
42
+  def headers(value = interpolated['headers'])
43
+    value.presence || {}
44
+  end
45
+
46
+  def basic_auth_credentials(value = interpolated['basic_auth'])
47
+    case value
48
+      when nil, ''
49
+        return nil
50
+      when Array
51
+        return value if value.size == 2
52
+      when /:/
53
+        return value.split(/:/, 2)
54
+    end
55
+    raise ArgumentError.new("bad value for basic_auth: #{value.inspect}")
56
+  end
57
+
58
+  def faraday_backend
59
+    ENV.fetch('FARADAY_HTTP_BACKEND', 'typhoeus').to_sym
60
+  end
61
+end

+ 89 - 0
app/models/agents/rss_agent.rb

@@ -0,0 +1,89 @@
1
+require 'rss'
2
+require 'feed-normalizer'
3
+
4
+module Agents
5
+  class RssAgent < Agent
6
+    include WebRequestConcern
7
+
8
+    cannot_receive_events!
9
+    default_schedule "every_1d"
10
+
11
+    description do
12
+      <<-MD
13
+        This Agent consumes RSS feeds and emits events when they change.
14
+
15
+        (If you want to *output* an RSS feed, use the DataOutputAgent.  Also, you can technically parse RSS and XML feeds
16
+        with the WebsiteAgent as well.  See [this example](https://github.com/cantino/huginn/wiki/Agent-configuration-examples#itunes-trailers).)
17
+
18
+        Options:
19
+
20
+          * `url` - The URL of the RSS feed.
21
+          * `clean` - Attempt to use [feed-normalizer](https://github.com/aasmith/feed-normalizer)'s' `clean!` method to cleanup HTML in the feed.  Set to `true` to use.
22
+          * `expected_update_period_in_days` - How often you expect this RSS feed to change.  If more than this amount of time passes without an update, the Agent will mark itself as not working.
23
+      MD
24
+    end
25
+
26
+    def default_options
27
+      {
28
+        'expected_update_period_in_days' => "5",
29
+        'clean' => 'false',
30
+        'url' => "https://github.com/cantino/huginn/commits/master.atom"
31
+      }
32
+    end
33
+
34
+    def working?
35
+      event_created_within?((interpolated['expected_update_period_in_days'].presence || 10).to_i) && !recent_error_logs?
36
+    end
37
+
38
+    def validate_options
39
+      errors.add(:base, "url is required") unless options['url'].present?
40
+
41
+      unless options['expected_update_period_in_days'].present? && options['expected_update_period_in_days'].to_i > 0
42
+        errors.add(:base, "Please provide 'expected_update_period_in_days' to indicate how many days can pass without an update before this Agent is considered to not be working")
43
+      end
44
+
45
+      validate_web_request_options!
46
+    end
47
+
48
+    def check
49
+      response = faraday.get(interpolated['url'])
50
+      if response.success?
51
+        feed = FeedNormalizer::FeedNormalizer.parse(response.body)
52
+        feed.clean! if interpolated['clean'] == 'true'
53
+        created_event_count = 0
54
+        feed.entries.each do |entry|
55
+          if check_and_track(entry.id)
56
+            created_event_count += 1
57
+            create_event(:payload => {
58
+              :id => entry.id,
59
+              :date_published => entry.date_published,
60
+              :last_updated => entry.last_updated,
61
+              :urls => entry.urls,
62
+              :description => entry.description,
63
+              :content => entry.content,
64
+              :title => entry.title,
65
+              :authors => entry.authors,
66
+              :categories => entry.categories
67
+            })
68
+          end
69
+        end
70
+        log "Fetched #{interpolated['url']} and created #{created_event_count} event(s)."
71
+      else
72
+        error "Failed to fetch #{interpolated['url']}: #{response.inspect}"
73
+      end
74
+    end
75
+
76
+    protected
77
+
78
+    def check_and_track(entry_id)
79
+      memory['seen_ids'] ||= []
80
+      if memory['seen_ids'].include?(entry_id)
81
+        false
82
+      else
83
+        memory['seen_ids'].unshift entry_id
84
+        memory['seen_ids'].pop if memory['seen_ids'].length > 500
85
+        true
86
+      end
87
+    end
88
+  end
89
+end

+ 2 - 55
app/models/agents/website_agent.rb

@@ -5,6 +5,7 @@ require 'date'
5 5
 
6 6
 module Agents
7 7
   class WebsiteAgent < Agent
8
+    include WebRequestConcern
8 9
 
9 10
     default_schedule "every_12h"
10 11
 
@@ -109,19 +110,7 @@ module Agents
109 110
         end
110 111
       end
111 112
 
112
-      if options['user_agent'].present?
113
-        errors.add(:base, "user_agent must be a string") unless options['user_agent'].is_a?(String)
114
-      end
115
-
116
-      unless headers.is_a?(Hash)
117
-        errors.add(:base, "if provided, headers must be a hash")
118
-      end
119
-
120
-      begin
121
-        basic_auth_credentials()
122
-      rescue => e
123
-        errors.add(:base, e.message)
124
-      end
113
+      validate_web_request_options!
125 114
     end
126 115
 
127 116
     def check
@@ -291,47 +280,5 @@ module Agents
291 280
         false
292 281
       end
293 282
     end
294
-
295
-    def faraday
296
-      @faraday ||= Faraday.new { |builder|
297
-        builder.headers = headers if headers.length > 0
298
-
299
-        if (user_agent = interpolated['user_agent']).present?
300
-          builder.headers[:user_agent] = user_agent
301
-        end
302
-
303
-        builder.use FaradayMiddleware::FollowRedirects
304
-        builder.request :url_encoded
305
-        if userinfo = basic_auth_credentials()
306
-          builder.request :basic_auth, *userinfo
307
-        end
308
-
309
-        case backend = faraday_backend
310
-        when :typhoeus
311
-          require 'typhoeus/adapters/faraday'
312
-        end
313
-        builder.adapter backend
314
-      }
315
-    end
316
-
317
-    def faraday_backend
318
-      ENV.fetch('FARADAY_HTTP_BACKEND', 'typhoeus').to_sym
319
-    end
320
-
321
-    def basic_auth_credentials
322
-      case value = interpolated['basic_auth']
323
-      when nil, ''
324
-        return nil
325
-      when Array
326
-        return value if value.size == 2
327
-      when /:/
328
-        return value.split(/:/, 2)
329
-      end
330
-      raise "bad value for basic_auth: #{value.inspect}"
331
-    end
332
-
333
-    def headers
334
-      interpolated['headers'].presence || {}
335
-    end
336 283
   end
337 284
 end

+ 356 - 0
spec/data_fixtures/github_rss.atom

@@ -0,0 +1,356 @@
1
+<?xml version="1.0" encoding="UTF-8"?>
2
+<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xml:lang="en-US">
3
+  <id>tag:github.com,2008:/cantino/huginn/commits/master</id>
4
+  <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commits/master"/>
5
+  <link type="application/atom+xml" rel="self" href="https://github.com/cantino/huginn/commits/master.atom"/>
6
+  <title>Recent Commits to huginn:master</title>
7
+  <updated>2014-07-16T22:26:22-07:00</updated>
8
+  <entry>
9
+    <id>tag:github.com,2008:Grit::Commit/d0a844662846cf3c83b94c637c1803f03db5a5b0</id>
10
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d0a844662846cf3c83b94c637c1803f03db5a5b0"/>
11
+    <title>
12
+        Merge pull request #402 from albertsun/safer-liquid-migration
13
+    </title>
14
+    <updated>2014-07-16T22:26:22-07:00</updated>
15
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
16
+    <author>
17
+      <name>cantino</name>
18
+      <uri>https://github.com/cantino</uri>
19
+    </author>
20
+    <content type="html">
21
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #402 from albertsun/safer-liquid-migration
22
+
23
+Inline models into migration&lt;/pre>
24
+    </content>
25
+  </entry>
26
+  <entry>
27
+    <id>tag:github.com,2008:Grit::Commit/4a433806eeace44f1e39f02ac61cefdadf3597e2</id>
28
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/4a433806eeace44f1e39f02ac61cefdadf3597e2"/>
29
+    <title>
30
+        inline models into migration
31
+    </title>
32
+    <updated>2014-07-16T15:25:08-04:00</updated>
33
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/382862?s=30"/>
34
+    <author>
35
+      <name>albertsun</name>
36
+      <uri>https://github.com/albertsun</uri>
37
+    </author>
38
+    <content type="html">
39
+      &lt;pre style='white-space:pre-wrap;width:81ex'>inline models into migration&lt;/pre>
40
+    </content>
41
+  </entry>
42
+  <entry>
43
+    <id>tag:github.com,2008:Grit::Commit/6ffa528ab0af7f9f5bb4b68437e7613e74fdb8c4</id>
44
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/6ffa528ab0af7f9f5bb4b68437e7613e74fdb8c4"/>
45
+    <title>
46
+        Merge pull request #398 from knu/imap_use_uid
47
+    </title>
48
+    <updated>2014-07-15T19:47:37-07:00</updated>
49
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
50
+    <author>
51
+      <name>cantino</name>
52
+      <uri>https://github.com/cantino</uri>
53
+    </author>
54
+    <content type="html">
55
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #398 from knu/imap_use_uid
56
+
57
+Use &quot;last seen UID&quot; in ImapFolderAgent&lt;/pre>
58
+    </content>
59
+  </entry>
60
+  <entry>
61
+    <id>tag:github.com,2008:Grit::Commit/c7e29492c98652cc9738c374d02dcbb7c9bdeac6</id>
62
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/c7e29492c98652cc9738c374d02dcbb7c9bdeac6"/>
63
+    <title>
64
+        Merge pull request #391 from theofpa/master
65
+    </title>
66
+    <updated>2014-07-12T15:19:56-07:00</updated>
67
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
68
+    <author>
69
+      <name>cantino</name>
70
+      <uri>https://github.com/cantino</uri>
71
+    </author>
72
+    <content type="html">
73
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #391 from theofpa/master
74
+
75
+Ignore xmlns when evaluating xpath&lt;/pre>
76
+    </content>
77
+  </entry>
78
+  <entry>
79
+    <id>tag:github.com,2008:Grit::Commit/f3552ece2e9af187bd5e613783dd27810b63c32f</id>
80
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/f3552ece2e9af187bd5e613783dd27810b63c32f"/>
81
+    <title>
82
+        ImapFolderAgent: Emit a log message when creating an event or skipping it.
83
+    </title>
84
+    <updated>2014-07-11T19:19:12+09:00</updated>
85
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
86
+    <author>
87
+      <name>knu</name>
88
+      <uri>https://github.com/knu</uri>
89
+    </author>
90
+    <content type="html">
91
+      &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Emit a log message when creating an event or skipping it.&lt;/pre>
92
+    </content>
93
+  </entry>
94
+  <entry>
95
+    <id>tag:github.com,2008:Grit::Commit/d144d3797d2db362943357c6d85238ec657cfa06</id>
96
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d144d3797d2db362943357c6d85238ec657cfa06"/>
97
+    <title>
98
+        ImapFolderAgent: Enable notification of mails already marked as read.
99
+    </title>
100
+    <updated>2014-07-11T19:08:55+09:00</updated>
101
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
102
+    <author>
103
+      <name>knu</name>
104
+      <uri>https://github.com/knu</uri>
105
+    </author>
106
+    <content type="html">
107
+      &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Enable notification of mails already marked as read.
108
+
109
+Add a condition key &quot;is_unread&quot; to allow user to select mails based on
110
+the read status.&lt;/pre>
111
+    </content>
112
+  </entry>
113
+  <entry>
114
+    <id>tag:github.com,2008:Grit::Commit/d1196a35ada22418bf0cf8b0d5947c2164e983e6</id>
115
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d1196a35ada22418bf0cf8b0d5947c2164e983e6"/>
116
+    <title>
117
+        ImapFolderAgent: &quot;conditions&quot; must not actually be nil.
118
+    </title>
119
+    <updated>2014-07-11T18:02:09+09:00</updated>
120
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
121
+    <author>
122
+      <name>knu</name>
123
+      <uri>https://github.com/knu</uri>
124
+    </author>
125
+    <content type="html">
126
+      &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: &quot;conditions&quot; must not actually be nil.&lt;/pre>
127
+    </content>
128
+  </entry>
129
+  <entry>
130
+    <id>tag:github.com,2008:Grit::Commit/280c09415ea8114d8a128cd7c2583ae0e0aa480d</id>
131
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/280c09415ea8114d8a128cd7c2583ae0e0aa480d"/>
132
+    <title>
133
+        ImapFolderAgent: Do not fail when port is blank.
134
+    </title>
135
+    <updated>2014-07-11T18:02:09+09:00</updated>
136
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
137
+    <author>
138
+      <name>knu</name>
139
+      <uri>https://github.com/knu</uri>
140
+    </author>
141
+    <content type="html">
142
+      &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Do not fail when port is blank.&lt;/pre>
143
+    </content>
144
+  </entry>
145
+  <entry>
146
+    <id>tag:github.com,2008:Grit::Commit/045fb957b2370d80190fa8dc036863076d8806fb</id>
147
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/045fb957b2370d80190fa8dc036863076d8806fb"/>
148
+    <title>
149
+        ImapFolderAgent now recognizes &quot;true&quot;/&quot;false&quot; as boolean option values.
150
+    </title>
151
+    <updated>2014-07-11T18:02:09+09:00</updated>
152
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
153
+    <author>
154
+      <name>knu</name>
155
+      <uri>https://github.com/knu</uri>
156
+    </author>
157
+    <content type="html">
158
+      &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent now recognizes &quot;true&quot;/&quot;false&quot; as boolean option values.
159
+
160
+Add a utility method Agent#boolify to make it easier to handle boolean
161
+option values.&lt;/pre>
162
+    </content>
163
+  </entry>
164
+  <entry>
165
+    <id>tag:github.com,2008:Grit::Commit/c1b9caa8ccb0c8b8f6103fc80b90fba57a822435</id>
166
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/c1b9caa8ccb0c8b8f6103fc80b90fba57a822435"/>
167
+    <title>
168
+        ImapFolderAgent: Unstringify integer keys of a hash saved in JSON.
169
+    </title>
170
+    <updated>2014-07-11T18:01:26+09:00</updated>
171
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
172
+    <author>
173
+      <name>knu</name>
174
+      <uri>https://github.com/knu</uri>
175
+    </author>
176
+    <content type="html">
177
+      &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Unstringify integer keys of a hash saved in JSON.&lt;/pre>
178
+    </content>
179
+  </entry>
180
+  <entry>
181
+    <id>tag:github.com,2008:Grit::Commit/6a06a32447721abc4477979610e36db0650e2f92</id>
182
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/6a06a32447721abc4477979610e36db0650e2f92"/>
183
+    <title>
184
+        ImapFolderAgent: Only keep a single UID value for each folder in memory.
185
+    </title>
186
+    <updated>2014-07-11T18:01:26+09:00</updated>
187
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
188
+    <author>
189
+      <name>knu</name>
190
+      <uri>https://github.com/knu</uri>
191
+    </author>
192
+    <content type="html">
193
+      &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Only keep a single UID value for each folder in memory.
194
+
195
+Previously it used to keep a list of the UIDs of unread mails.  Now we
196
+start to assume that UIDs in a folder identified by a UID VALIDITY value
197
+are strictly ascending (monotonically increasing) as suggested by RFC
198
+3501 and 4549 and just keep the highest UID seen in the last run.
199
+
200
+This enhancement will help reduce the size of memory typically where
201
+mails are left unread forever.&lt;/pre>
202
+    </content>
203
+  </entry>
204
+  <entry>
205
+    <id>tag:github.com,2008:Grit::Commit/9ed63e45b247c30a02e8e59b4d24fccbe8644876</id>
206
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/9ed63e45b247c30a02e8e59b4d24fccbe8644876"/>
207
+    <title>
208
+        Merge pull request #397 from cantino/update_rails_and_gems
209
+    </title>
210
+    <updated>2014-07-05T16:34:29-07:00</updated>
211
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
212
+    <author>
213
+      <name>cantino</name>
214
+      <uri>https://github.com/cantino</uri>
215
+    </author>
216
+    <content type="html">
217
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #397 from cantino/update_rails_and_gems
218
+
219
+upgrade rails and gems&lt;/pre>
220
+    </content>
221
+  </entry>
222
+  <entry>
223
+    <id>tag:github.com,2008:Grit::Commit/87a7abda23a82305d7050ac0bb400ce36c863d01</id>
224
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/87a7abda23a82305d7050ac0bb400ce36c863d01"/>
225
+    <title>
226
+        upgrade rails and gems
227
+    </title>
228
+    <updated>2014-07-05T08:01:36-07:00</updated>
229
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
230
+    <author>
231
+      <name>cantino</name>
232
+      <uri>https://github.com/cantino</uri>
233
+    </author>
234
+    <content type="html">
235
+      &lt;pre style='white-space:pre-wrap;width:81ex'>upgrade rails and gems&lt;/pre>
236
+    </content>
237
+  </entry>
238
+  <entry>
239
+    <id>tag:github.com,2008:Grit::Commit/ea7594fa976fe24bb7024b6e3e0d2881dd86033a</id>
240
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/ea7594fa976fe24bb7024b6e3e0d2881dd86033a"/>
241
+    <title>
242
+        Merge pull request #396 from knu/show_propagate_immediately
243
+    </title>
244
+    <updated>2014-07-03T20:50:40-07:00</updated>
245
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
246
+    <author>
247
+      <name>cantino</name>
248
+      <uri>https://github.com/cantino</uri>
249
+    </author>
250
+    <content type="html">
251
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #396 from knu/show_propagate_immediately
252
+
253
+Make propagate_immediately more visible in agent details and the diagram.&lt;/pre>
254
+    </content>
255
+  </entry>
256
+  <entry>
257
+    <id>tag:github.com,2008:Grit::Commit/0e80f5341587aace2c023b06eb9265b776ac4535</id>
258
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/0e80f5341587aace2c023b06eb9265b776ac4535"/>
259
+    <title>
260
+        Dashed line in a diagram indicates propagate_immediately being false.
261
+    </title>
262
+    <updated>2014-07-04T03:42:52+09:00</updated>
263
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
264
+    <author>
265
+      <name>knu</name>
266
+      <uri>https://github.com/knu</uri>
267
+    </author>
268
+    <content type="html">
269
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Dashed line in a diagram indicates propagate_immediately being false.&lt;/pre>
270
+    </content>
271
+  </entry>
272
+  <entry>
273
+    <id>tag:github.com,2008:Grit::Commit/cf9cdfb3ac9d47b7fdf5d7669577c964bee9a186</id>
274
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/cf9cdfb3ac9d47b7fdf5d7669577c964bee9a186"/>
275
+    <title>
276
+        Show the propagate_immediately flag in agent details.
277
+    </title>
278
+    <updated>2014-07-04T02:53:31+09:00</updated>
279
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
280
+    <author>
281
+      <name>knu</name>
282
+      <uri>https://github.com/knu</uri>
283
+    </author>
284
+    <content type="html">
285
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Show the propagate_immediately flag in agent details.&lt;/pre>
286
+    </content>
287
+  </entry>
288
+  <entry>
289
+    <id>tag:github.com,2008:Grit::Commit/b1128335b8de98afc5cad1b2ca5573e3bab1da1d</id>
290
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/b1128335b8de98afc5cad1b2ca5573e3bab1da1d"/>
291
+    <title>
292
+        Merge pull request #389 from dsander/silence_worker_status
293
+    </title>
294
+    <updated>2014-07-01T21:47:40-07:00</updated>
295
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
296
+    <author>
297
+      <name>cantino</name>
298
+      <uri>https://github.com/cantino</uri>
299
+    </author>
300
+    <content type="html">
301
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #389 from dsander/silence_worker_status
302
+
303
+Supress logging for requests to the /worker_status&lt;/pre>
304
+    </content>
305
+  </entry>
306
+  <entry>
307
+    <id>tag:github.com,2008:Grit::Commit/d25e670b1c040f78eb648120c117853421d522c3</id>
308
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d25e670b1c040f78eb648120c117853421d522c3"/>
309
+    <title>
310
+        Merge pull request #393 from CloCkWeRX/google_calendar
311
+    </title>
312
+    <updated>2014-07-01T21:47:16-07:00</updated>
313
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
314
+    <author>
315
+      <name>cantino</name>
316
+      <uri>https://github.com/cantino</uri>
317
+    </author>
318
+    <content type="html">
319
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #393 from CloCkWeRX/google_calendar
320
+
321
+Add Google calendar publish agent&lt;/pre>
322
+    </content>
323
+  </entry>
324
+  <entry>
325
+    <id>tag:github.com,2008:Grit::Commit/d7b0e35aaaafec3032d3fe271b426f1e9d3727b4</id>
326
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d7b0e35aaaafec3032d3fe271b426f1e9d3727b4"/>
327
+    <title>
328
+        switch to cantino-twitter-stream
329
+    </title>
330
+    <updated>2014-07-01T21:36:38-07:00</updated>
331
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
332
+    <author>
333
+      <name>cantino</name>
334
+      <uri>https://github.com/cantino</uri>
335
+    </author>
336
+    <content type="html">
337
+      &lt;pre style='white-space:pre-wrap;width:81ex'>switch to cantino-twitter-stream&lt;/pre>
338
+    </content>
339
+  </entry>
340
+  <entry>
341
+    <id>tag:github.com,2008:Grit::Commit/d465158f77dcd9078697e6167b50abbfdfa8b1af</id>
342
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af"/>
343
+    <title>
344
+        Shift to dev group
345
+    </title>
346
+    <updated>2014-07-01T16:37:47+09:30</updated>
347
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/365751?s=30"/>
348
+    <author>
349
+      <name>CloCkWeRX</name>
350
+      <uri>https://github.com/CloCkWeRX</uri>
351
+    </author>
352
+    <content type="html">
353
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Shift to dev group&lt;/pre>
354
+    </content>
355
+  </entry>
356
+</feed>

+ 81 - 0
spec/models/agents/rss_agent_spec.rb

@@ -0,0 +1,81 @@
1
+require 'spec_helper'
2
+
3
+describe Agents::RssAgent do
4
+  before do
5
+    @valid_options = {
6
+      'expected_update_period_in_days' => "2",
7
+      'url' => "https://github.com/cantino/huginn/commits/master.atom",
8
+    }
9
+
10
+    stub_request(:any, /github.com/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")), :status => 200)
11
+  end
12
+
13
+  let(:agent) do
14
+    _agent = Agents::RssAgent.new(:name => "github rss feed", :options => @valid_options)
15
+    _agent.user = users(:bob)
16
+    _agent.save!
17
+    _agent
18
+  end
19
+
20
+  it_behaves_like WebRequestConcern
21
+
22
+  describe "validations" do
23
+    it "should validate the presence of url" do
24
+      agent.options['url'] = "http://google.com"
25
+      agent.should be_valid
26
+
27
+      agent.options['url'] = ""
28
+      agent.should_not be_valid
29
+
30
+      agent.options['url'] = nil
31
+      agent.should_not be_valid
32
+    end
33
+
34
+    it "should validate the presence and numericality of expected_update_period_in_days" do
35
+      agent.options['expected_update_period_in_days'] = "5"
36
+      agent.should be_valid
37
+
38
+      agent.options['expected_update_period_in_days'] = "wut?"
39
+      agent.should_not be_valid
40
+
41
+      agent.options['expected_update_period_in_days'] = 0
42
+      agent.should_not be_valid
43
+
44
+      agent.options['expected_update_period_in_days'] = nil
45
+      agent.should_not be_valid
46
+
47
+      agent.options['expected_update_period_in_days'] = ""
48
+      agent.should_not be_valid
49
+    end
50
+  end
51
+
52
+  describe "emitting RSS events" do
53
+    it "should emit items as events" do
54
+      lambda {
55
+        agent.check
56
+      }.should change { agent.events.count }.by(20)
57
+    end
58
+
59
+    it "should track ids and not re-emit the same item when seen again" do
60
+      agent.check
61
+      agent.memory['seen_ids'].should == agent.events.map {|e| e.payload['id'] }
62
+
63
+      newest_id = agent.memory['seen_ids'][0]
64
+      agent.events.first.payload['id'].should == newest_id
65
+      agent.memory['seen_ids'] = agent.memory['seen_ids'][1..-1] # forget the newest id
66
+
67
+      lambda {
68
+        agent.check
69
+      }.should change { agent.events.count }.by(1)
70
+
71
+      agent.events.first.payload['id'].should == newest_id
72
+      agent.memory['seen_ids'][0].should == newest_id
73
+    end
74
+
75
+    it "should truncate the seen_ids in memory at 500 items" do
76
+      agent.memory['seen_ids'] = ['x'] * 490
77
+      agent.check
78
+      agent.memory['seen_ids'].length.should == 500
79
+    end
80
+  end
81
+end

+ 39 - 51
spec/models/agents/website_agent_spec.rb

@@ -4,9 +4,9 @@ describe Agents::WebsiteAgent do
4 4
   describe "checking without basic auth" do
5 5
     before do
6 6
       stub_request(:any, /xkcd/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
7
-      @site = {
7
+      @valid_options = {
8 8
         'name' => "XKCD",
9
-        'expected_update_period_in_days' => 2,
9
+        'expected_update_period_in_days' => "2",
10 10
         'type' => "html",
11 11
         'url' => "http://xkcd.com",
12 12
         'mode' => 'on_change',
@@ -16,11 +16,13 @@ describe Agents::WebsiteAgent do
16 16
           'hovertext' => { 'css' => "#comic img", 'attr' => "title" }
17 17
         }
18 18
       }
19
-      @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @site, :keep_events_for => 2)
19
+      @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @valid_options, :keep_events_for => 2)
20 20
       @checker.user = users(:bob)
21 21
       @checker.save!
22 22
     end
23 23
 
24
+    it_behaves_like WebRequestConcern
25
+
24 26
     describe "validations" do
25 27
       before do
26 28
         @checker.should be_valid
@@ -42,20 +44,6 @@ describe Agents::WebsiteAgent do
42 44
         @checker.should be_valid
43 45
       end
44 46
 
45
-      it "should validate headers" do
46
-        @checker.options['headers'] = "blah"
47
-        @checker.should_not be_valid
48
-
49
-        @checker.options['headers'] = ""
50
-        @checker.should be_valid
51
-
52
-        @checker.options['headers'] = {}
53
-        @checker.should be_valid
54
-
55
-        @checker.options['headers'] = { 'foo' => 'bar' }
56
-        @checker.should be_valid
57
-      end
58
-
59 47
       it "should validate mode" do
60 48
         @checker.options['mode'] = "nonsense"
61 49
         @checker.should_not be_valid
@@ -97,16 +85,16 @@ describe Agents::WebsiteAgent do
97 85
 
98 86
       it "should always save events when in :all mode" do
99 87
         lambda {
100
-          @site['mode'] = 'all'
101
-          @checker.options = @site
88
+          @valid_options['mode'] = 'all'
89
+          @checker.options = @valid_options
102 90
           @checker.check
103 91
           @checker.check
104 92
         }.should change { Event.count }.by(2)
105 93
       end
106 94
 
107 95
       it "should take uniqueness_look_back into account during deduplication" do
108
-        @site['mode'] = 'all'
109
-        @checker.options = @site
96
+        @valid_options['mode'] = 'all'
97
+        @checker.options = @valid_options
110 98
         @checker.check
111 99
         @checker.check
112 100
         event = Event.last
@@ -114,47 +102,47 @@ describe Agents::WebsiteAgent do
114 102
         event.save
115 103
 
116 104
         lambda {
117
-          @site['mode'] = 'on_change'
118
-          @site['uniqueness_look_back'] = 2
119
-          @checker.options = @site
105
+          @valid_options['mode'] = 'on_change'
106
+          @valid_options['uniqueness_look_back'] = 2
107
+          @checker.options = @valid_options
120 108
           @checker.check
121 109
         }.should_not change { Event.count }
122 110
 
123 111
         lambda {
124
-          @site['mode'] = 'on_change'
125
-          @site['uniqueness_look_back'] = 1
126
-          @checker.options = @site
112
+          @valid_options['mode'] = 'on_change'
113
+          @valid_options['uniqueness_look_back'] = 1
114
+          @checker.options = @valid_options
127 115
           @checker.check
128 116
         }.should change { Event.count }.by(1)
129 117
       end
130 118
 
131 119
       it "should log an error if the number of results for a set of extraction patterns differs" do
132
-        @site['extract']['url']['css'] = "div"
133
-        @checker.options = @site
120
+        @valid_options['extract']['url']['css'] = "div"
121
+        @checker.options = @valid_options
134 122
         @checker.check
135 123
         @checker.logs.first.message.should =~ /Got an uneven number of matches/
136 124
       end
137 125
 
138 126
       it "should accept an array for url" do
139
-        @site['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
140
-        @checker.options = @site
127
+        @valid_options['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
128
+        @checker.options = @valid_options
141 129
         lambda { @checker.save! }.should_not raise_error;
142 130
         lambda { @checker.check }.should_not raise_error;
143 131
       end
144 132
 
145 133
       it "should parse events from all urls in array" do
146 134
         lambda {
147
-          @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
148
-          @site['mode'] = 'all'
149
-          @checker.options = @site
135
+          @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
136
+          @valid_options['mode'] = 'all'
137
+          @checker.options = @valid_options
150 138
           @checker.check
151 139
         }.should change { Event.count }.by(2)
152 140
       end
153 141
 
154 142
       it "should follow unique rules when parsing array of urls" do
155 143
         lambda {
156
-          @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
157
-          @checker.options = @site
144
+          @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
145
+          @checker.options = @valid_options
158 146
           @checker.check
159 147
         }.should change { Event.count }.by(1)
160 148
       end
@@ -170,7 +158,7 @@ describe Agents::WebsiteAgent do
170 158
           }, :status => 200)
171 159
         site = {
172 160
           'name' => "Some JSON Response",
173
-          'expected_update_period_in_days' => 2,
161
+          'expected_update_period_in_days' => "2",
174 162
           'type' => "json",
175 163
           'url' => "http://no-encoding.example.com",
176 164
           'mode' => 'on_change',
@@ -197,7 +185,7 @@ describe Agents::WebsiteAgent do
197 185
           }, :status => 200)
198 186
         site = {
199 187
           'name' => "Some JSON Response",
200
-          'expected_update_period_in_days' => 2,
188
+          'expected_update_period_in_days' => "2",
201 189
           'type' => "json",
202 190
           'url' => "http://wrong-encoding.example.com",
203 191
           'mode' => 'on_change',
@@ -248,11 +236,11 @@ describe Agents::WebsiteAgent do
248 236
       end
249 237
 
250 238
       it "parses XPath" do
251
-        @site['extract'].each { |key, value|
239
+        @valid_options['extract'].each { |key, value|
252 240
           value.delete('css')
253 241
           value['xpath'] = "//*[@id='comic']//img"
254 242
         }
255
-        @checker.options = @site
243
+        @checker.options = @valid_options
256 244
         @checker.check
257 245
         event = Event.last
258 246
         event.payload['url'].should == "http://imgs.xkcd.com/comics/evolving.png"
@@ -263,7 +251,7 @@ describe Agents::WebsiteAgent do
263 251
       it "should turn relative urls to absolute" do
264 252
         rel_site = {
265 253
           'name' => "XKCD",
266
-          'expected_update_period_in_days' => 2,
254
+          'expected_update_period_in_days' => "2",
267 255
           'type' => "html",
268 256
           'url' => "http://xkcd.com",
269 257
           'mode' => "on_change",
@@ -291,7 +279,7 @@ describe Agents::WebsiteAgent do
291 279
           stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
292 280
           site = {
293 281
             'name' => "Some JSON Response",
294
-            'expected_update_period_in_days' => 2,
282
+            'expected_update_period_in_days' => "2",
295 283
             'type' => "json",
296 284
             'url' => "http://json-site.com",
297 285
             'mode' => 'on_change',
@@ -322,7 +310,7 @@ describe Agents::WebsiteAgent do
322 310
           stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
323 311
           site = {
324 312
             'name' => "Some JSON Response",
325
-            'expected_update_period_in_days' => 2,
313
+            'expected_update_period_in_days' => "2",
326 314
             'type' => "json",
327 315
             'url' => "http://json-site.com",
328 316
             'mode' => 'on_change',
@@ -358,7 +346,7 @@ describe Agents::WebsiteAgent do
358 346
           stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
359 347
           site = {
360 348
             'name' => "Some JSON Response",
361
-            'expected_update_period_in_days' => 2,
349
+            'expected_update_period_in_days' => "2",
362 350
             'type' => "json",
363 351
             'url' => "http://json-site.com",
364 352
             'mode' => 'on_change'
@@ -382,7 +370,7 @@ describe Agents::WebsiteAgent do
382 370
         @event.payload = { 'url' => "http://xkcd.com" }
383 371
 
384 372
         lambda {
385
-          @checker.options = @site
373
+          @checker.options = @valid_options
386 374
           @checker.receive([@event])
387 375
         }.should change { Event.count }.by(1)
388 376
       end
@@ -394,9 +382,9 @@ describe Agents::WebsiteAgent do
394 382
       stub_request(:any, /example/).
395 383
         with(headers: { 'Authorization' => "Basic #{['user:pass'].pack('m').chomp}" }).
396 384
         to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
397
-      @site = {
385
+      @valid_options = {
398 386
         'name' => "XKCD",
399
-        'expected_update_period_in_days' => 2,
387
+        'expected_update_period_in_days' => "2",
400 388
         'type' => "html",
401 389
         'url' => "http://www.example.com",
402 390
         'mode' => 'on_change',
@@ -407,7 +395,7 @@ describe Agents::WebsiteAgent do
407 395
         },
408 396
         'basic_auth' => "user:pass"
409 397
       }
410
-      @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @site)
398
+      @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @valid_options)
411 399
       @checker.user = users(:bob)
412 400
       @checker.save!
413 401
     end
@@ -425,9 +413,9 @@ describe Agents::WebsiteAgent do
425 413
       stub_request(:any, /example/).
426 414
         with(headers: { 'foo' => 'bar', 'user_agent' => /Faraday/ }).
427 415
         to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
428
-      @site = {
416
+      @valid_options = {
429 417
         'name' => "XKCD",
430
-        'expected_update_period_in_days' => 2,
418
+        'expected_update_period_in_days' => "2",
431 419
         'type' => "html",
432 420
         'url' => "http://www.example.com",
433 421
         'mode' => 'on_change',
@@ -436,7 +424,7 @@ describe Agents::WebsiteAgent do
436 424
           'url' => { 'css' => "#comic img", 'attr' => "src" },
437 425
         }
438 426
       }
439
-      @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @site)
427
+      @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
440 428
       @checker.user = users(:bob)
441 429
       @checker.save!
442 430
     end

+ 66 - 0
spec/support/shared_examples/web_request_concern.rb

@@ -0,0 +1,66 @@
1
+require 'spec_helper'
2
+
3
+shared_examples_for WebRequestConcern do
4
+  let(:agent) do
5
+    _agent = described_class.new(:name => "some agent", :options => @valid_options || {})
6
+    _agent.user = users(:jane)
7
+    _agent
8
+  end
9
+
10
+  describe "validations" do
11
+    it "should be valid" do
12
+      agent.should be_valid
13
+    end
14
+
15
+    it "should validate user_agent" do
16
+      agent.options['user_agent'] = nil
17
+      agent.should be_valid
18
+
19
+      agent.options['user_agent'] = ""
20
+      agent.should be_valid
21
+
22
+      agent.options['user_agent'] = "foo"
23
+      agent.should be_valid
24
+
25
+      agent.options['user_agent'] = ["foo"]
26
+      agent.should_not be_valid
27
+
28
+      agent.options['user_agent'] = 1
29
+      agent.should_not be_valid
30
+    end
31
+
32
+    it "should validate headers" do
33
+      agent.options['headers'] = "blah"
34
+      agent.should_not be_valid
35
+
36
+      agent.options['headers'] = ""
37
+      agent.should be_valid
38
+
39
+      agent.options['headers'] = {}
40
+      agent.should be_valid
41
+
42
+      agent.options['headers'] = { 'foo' => 'bar' }
43
+      agent.should be_valid
44
+    end
45
+
46
+    it "should validate basic_auth" do
47
+      agent.options['basic_auth'] = "foo:bar"
48
+      agent.should be_valid
49
+
50
+      agent.options['basic_auth'] = ["foo", "bar"]
51
+      agent.should be_valid
52
+
53
+      agent.options['basic_auth'] = ""
54
+      agent.should be_valid
55
+
56
+      agent.options['basic_auth'] = nil
57
+      agent.should be_valid
58
+
59
+      agent.options['basic_auth'] = "blah"
60
+      agent.should_not be_valid
61
+
62
+      agent.options['basic_auth'] = ["blah"]
63
+      agent.should_not be_valid
64
+    end
65
+  end
66
+end